Population GDP Avg Temperature
library(tidyverse)
[30m── [1mAttaching packages[22m ────────────────────────────────────────── tidyverse 1.3.0 ──[39m
[30m[32m✔[30m [34mggplot2[30m 3.2.1 [32m✔[30m [34mpurrr [30m 0.3.3
[32m✔[30m [34mtibble [30m 2.1.3 [32m✔[30m [34mdplyr [30m 0.8.3
[32m✔[30m [34mtidyr [30m 1.0.2 [32m✔[30m [34mstringr[30m 1.4.0
[32m✔[30m [34mreadr [30m 1.3.1 [32m✔[30m [34mforcats[30m 0.4.0[39m
[30m── [1mConflicts[22m ───────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31m✖[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
library(anchors)
Loading required package: rgenoud
## rgenoud (Version 5.8-3.0, Build Date: 2019-01-22)
## See http://sekhon.berkeley.edu/rgenoud for additional documentation.
## Please cite software as:
## Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011.
## ``Genetic Optimization Using Derivatives: The rgenoud package for R.''
## Journal of Statistical Software, 42(11): 1-26.
##
Loading required package: MASS
Attaching package: ‘MASS’
The following object is masked from ‘package:dplyr’:
select
## anchors (Version 3.0-8, Build Date: 2014-02-24)
## See http://wand.stanford.edu/anchors for additional documentation and support.
library(moderndive)
Code to Set Up our CSV file
The code in this chunk below sums up all the cases for countries that are broken into regions so we can have one value per row for those countries. We had to do this for the US, China, and Australia.
virus = read_csv("covid_19_clean_complete.csv")
Parsed with column specification:
cols(
Province_State = [31mcol_character()[39m,
Country_Region = [31mcol_character()[39m,
Lat = [32mcol_double()[39m,
Long = [32mcol_double()[39m,
Date = [34mcol_date(format = "")[39m,
Confirmed = [32mcol_double()[39m,
Deaths = [32mcol_double()[39m,
Recovered = [32mcol_double()[39m,
Active = [32mcol_double()[39m,
`WHO Region` = [31mcol_character()[39m
)
virus <- replace.value(virus,"Country_Region",from = "US",to = "United States") #Replacing value of US with value United States
#US_virus is the data with the cruise ships filtered out and sums up all the cases in each county and combines them into a total number of cases grouped by date
#No_US_virus is the data without the US and is just for all the other countries
#country_virus is the final dataset grouped by country
#US_virus = virus %>% filter(Country_Region == "US") %>% filter(Province_State != "Grand Princess Cruise Ship",Province_State != "Omaha, NE (From Diamond Princess)",Province_State != "Travis, CA (From Diamond Princess)",Province_State != "Lackland, TX (From Diamond Princess)",Province_State != "Unassigned Location (From Diamond Princess)") %>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths), Recovered = sum(Recovered)) %>% mutate("Province_State" = "US", "Country_Region" = "US") #OLD DATSET CODE; new dataset broken into cities and countries
#Code for Second Version of Dataset until 3/25
#US_virus = virus %>% filter(Country_Region == "United States")%>% group_by(Date) %>% separate(col = Province_State, into = c("City_or_County","State"),sep = ",") %>% filter(is.na(State)== T)%>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths), Recovered = sum(Recovered)) %>% mutate("Province_State" = "United States", "Country_Region" = "United States")
US_virus = virus %>% filter(Country_Region == "United States")%>% group_by(Date) %>% separate(col = Province_State, into = c("City_or_County","State"),sep = ",") %>% filter(is.na(State)== T)%>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "United States", "Country_Region" = "United States")
No_US_virus = virus %>% filter(Country_Region != "United States",Country_Region != "China",Country_Region != "Australia",Country_Region != "Canada")
country_virus = full_join(No_US_virus, US_virus)
Joining, by = c("Province_State", "Country_Region", "Date", "Confirmed", "Deaths")
country_virus %>% filter(Country_Region == "United States")
#For China
China_virus = virus %>% filter(Country_Region == "China") %>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "China", "Country_Region" = "China")
No_China_virus = virus %>% filter(Country_Region != "China", Country_Region != "United States",Country_Region != "Australia",Country_Region != "Canada")
country_virus1 = full_join(No_China_virus, China_virus)
Joining, by = c("Province_State", "Country_Region", "Date", "Confirmed", "Deaths")
#For Canada
Canada_virus = virus %>% filter(Country_Region == "Canada") %>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "Canada", "Country_Region" = "Canada")
No_Canada_virus = virus %>% filter(Country_Region != "China", Country_Region != "United States",Country_Region != "Australia", Country_Region != "Canada")
country_virus11 = full_join(No_Canada_virus, Canada_virus)
Joining, by = c("Province_State", "Country_Region", "Date", "Confirmed", "Deaths")
#For Australia
Australia_virus = virus %>% filter(Country_Region == "Australia")%>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "Australia", "Country_Region" = "Australia")
No_Australia_virus = virus %>% filter(Country_Region != "Australia", Country_Region != "United States",Country_Region != "China", Country_Region != "Canada")
country_virus2 = full_join(No_Australia_virus, Australia_virus)
Joining, by = c("Province_State", "Country_Region", "Date", "Confirmed", "Deaths")
predata = full_join(country_virus,country_virus1)
Joining, by = c("Province_State", "Country_Region", "Lat", "Long", "Date", "Confirmed", "Deaths", "Recovered", "Active", "WHO Region")
mydata = full_join(predata,country_virus2)
Joining, by = c("Province_State", "Country_Region", "Lat", "Long", "Date", "Confirmed", "Deaths", "Recovered", "Active", "WHO Region")
mydata2 = full_join(mydata,country_virus11)
Joining, by = c("Province_State", "Country_Region", "Lat", "Long", "Date", "Confirmed", "Deaths", "Recovered", "Active", "WHO Region")
#mydata2 %>% filter(Country_Region == "Canada")
mydata2
#GDPandWeather = GDPandWeather %>% rename("Country_Region"= Country)
GDPandWeather
partialdata = left_join(mydata2,GDPandWeather,by = "Country_Region")
country_population_2 = country_population_1 %>% rename("Country_Region"= Country)
ourfinaldata = left_join(partialdata,country_population_2, by = "Country_Region")
Filter by the previous day to make sure we have the most recent data.
ourfinaldata %>% filter(Country_Region == "United States")
ourfinaldata
poster = ourfinaldata %>% dplyr::select(-Province_State,-Recovered)
write_csv(ourfinaldata, "ourfinaldata.csv")
library(lubridate)
Attaching package: ‘lubridate’
The following object is masked from ‘package:base’:
date
Code to Analyze our CSV file
#Formatting the variable date as mdy and ordering it
ourfinaldata$Date <-as.Date(ourfinaldata$Date, format = "%m/%d/%y")
#One Plot of World Deaths
ourfinaldata %>% group_by(Date) %>% summarize(Total = sum(Confirmed,na.rm = TRUE), Dead_count = sum(Deaths,na.rm = TRUE)) %>% ggplot() + geom_line(aes(x = Date, y = Total,group=1, size = Dead_count)) + ggtitle("World Cases and Deaths Graph 1")

#Another Plot of World Deaths
ourfinaldata %>% group_by(Date) %>% summarize(Total = sum(Confirmed,na.rm = TRUE), Dead_count = sum(Deaths,na.rm = TRUE)) %>% ggplot() + geom_line(aes(x = Date, y = Total,group = 1), linetype = "dashed") + geom_line(aes(x = Date, y = Dead_count, group = 1), color = "red")+ ggtitle("World Cases and Deaths Graph 2")

#Deaths in China
ourfinaldata %>% filter(Country_Region == "China") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("China Cases and Deaths")

#Deaths in United States
ourfinaldata %>% filter(Country_Region == "United States") %>% group_by(Date) %>% arrange(Date)%>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("United States Cases and Deaths")

USA_trends = ourfinaldata %>% filter(Country_Region == "United States") %>% group_by(Date) %>% arrange(Date)
write_csv(USA_trends, "USA_trends.csv")
#Deaths in South Korea
ourfinaldata %>% filter(Country_Region == "South Korea") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("South Korea Cases and Deaths")

#Deaths in Italy
ourfinaldata %>% filter(Country_Region == "Italy") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Italy Cases and Deaths")

#Deaths in Spain
ourfinaldata %>% filter(Country_Region == "Spain") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Spain Cases and Deaths")

#Deaths in Iran
ourfinaldata %>% filter(Country_Region == "Iran") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Iran Cases and Deaths")

#Deaths in Sweeden
ourfinaldata %>% filter(Country_Region == "Sweden") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Sweden Cases and Deaths")

#Deaths in Canada
ourfinaldata %>% filter(Country_Region == "Canada") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Canada Cases and Deaths")

#Deaths in Japan
ourfinaldata %>% filter(Country_Region == "Japan") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Japan Cases and Deaths")

#Deaths in Costa Rica
ourfinaldata %>% filter(Country_Region == "Costa Rica") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Costa Rica Cases and Deaths")

#Deaths in Germany
ourfinaldata %>% filter(Country_Region == "Germany") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Germany Cases and Deaths")

#Deaths in United Kingdom
ourfinaldata %>% filter(Country_Region == "United Kingdom"& is.na(Province_State)) %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("United Kingdom Cases and Deaths")

#Deaths in France
ourfinaldata %>% filter(Country_Region == "France" & is.na(Province_State)) %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("France Cases and Deaths")

#Russia
ourfinaldata %>% filter(Country_Region == "Russia") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Russia Cases and Deaths")

#Egypt
ourfinaldata %>% filter(Country_Region == "Egypt") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Egypt Cases and Deaths")

#South Africa
ourfinaldata %>% filter(Country_Region == "South Africa") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("South Africa Cases and Deaths")

#Finlad
ourfinaldata %>% filter(Country_Region == "Finland") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Finland Cases and Deaths")

#Austria
ourfinaldata %>% filter(Country_Region == "Austria") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Austria Cases and Deaths")

#Portugal
ourfinaldata %>% filter(Country_Region == "Portugal") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Portugal Cases and Deaths")

#Indonesia
ourfinaldata %>% filter(Country_Region == "Indonesia") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Indonesia Cases and Deaths")

#Australia
ourfinaldata %>% filter(Country_Region == "Australia") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Australia Cases and Deaths")

#Mexico
ourfinaldata %>% filter(Country_Region == "Mexico") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Mexico Cases and Deaths")

#Serbia
ourfinaldata %>% filter(Country_Region == "Serbia") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Serbia Cases and Deaths")

#Iceland
ourfinaldata %>% filter(Country_Region == "Iceland") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Iceland Cases and Deaths")

#Afghanistan
ourfinaldata %>% filter(Country_Region == "Afghanistan") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Afghanistan Cases and Deaths")

#Norway
ourfinaldata %>% filter(Country_Region == "Norway") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Norway Cases and Deaths")

#Comparison of Total Cases in each country
ourfinaldata %>% filter(Country_Region == "United States"|Country_Region == "Italy"|Country_Region=="China"|Country_Region == "Spain"|Country_Region == "Iran"|Country_Region == "Australia"|Country_Region == "South Korea"|Country_Region == "Canada"|Country_Region == "Brazil" ) %>% ggplot(mapping = aes(x=Date,y = Confirmed, color = Country_Region))+geom_line() +ggtitle("Total Confirmed Cases Over Time")

ourfinaldata %>% filter(Country_Region == "United States"|Country_Region == "Italy"|Country_Region=="China"|Country_Region == "Spain"|Country_Region == "Iran"|Country_Region == "Australia"|Country_Region == "South Korea"|Country_Region == "Canada"|Country_Region == "Brazil") %>% ggplot(mapping = aes(x=Date,y = Deaths, color = Country_Region))+geom_line() +ggtitle("Total Confirmed Deaths Over Time")

Plots of the Percentage of Population Infected
#Note the y-axis for these graphs should be 0-100 because calculated as a percent but this shows how few people have it at this point
#USA
ourfinaldata %>%filter(Country_Region == "United States") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in the USA")

#China
ourfinaldata %>%filter(Country_Region == "China") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in China")

#Italy
ourfinaldata %>%filter(Country_Region == "Italy") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in Italy")

#Spain
ourfinaldata %>%filter(Country_Region == "Spain") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in Spain")

#India
ourfinaldata %>%filter(Country_Region == "India") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in India")

#Iran
ourfinaldata %>%filter(Country_Region == "Iran") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in Iran")

#South Africa
ourfinaldata %>%filter(Country_Region == "South Africa") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in South Africa")

Logistic Growth for Italy
ourfinaldata %>% filter(Country_Region == "Italy") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Italy Cases and Deaths")

num_days_Italy = nrow(ourfinaldata %>% filter(Country_Region == "Italy",Confirmed >=1))
Italy_dat = ourfinaldata %>% filter(Country_Region == "Italy",Confirmed >=1) %>% mutate("Num_Days_Since_Start"=c(1:num_days_Italy))
Italy_mod = nls(Confirmed ~ c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=100, b = -0.13, c = max(Italy_dat$Confirmed)), data = Italy_dat)
coef(summary(Italy_mod))
Estimate Std. Error t value Pr(>|t|)
a 3.861112e+02 4.393362e+01 8.788513 8.127566e-15
b -9.225626e-02 1.846467e-03 -49.963657 2.830435e-86
c 2.294176e+05 1.094010e+03 209.703429 2.572178e-165
plot(Italy_dat$Num_Days_Since_Start,Italy_dat$Confirmed,xlab = "Number of Days Since Start",ylab = "Number of Confirmed Cases",main = "Logistic Regression for Italy")
curve(2.145977e+05/(1+692.2*exp(x*-0.104)), col = "blue", add = TRUE)

#lines(Italy_dat$Num_Days_Since_Start, predict(Italy_mod), col = 2)
Logistic Growth for United States
num_days_USA = nrow(ourfinaldata %>% filter(Country_Region == "United States",Confirmed >1))
USA_dat = ourfinaldata %>% filter(Country_Region == "United States",Confirmed >1) %>% mutate("Num_Days_Since_Start"=c(1:num_days_USA)) %>% arrange(Date)
#Using Nonlinear Least Squares Logistic Growth Model
#USA_mod = nls(Confirmed ~ c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=30000.3, b = -0.10, c = max(USA_dat$Confirmed)), data = USA_dat)
#coef(summary(USA_mod))
Logistic Growth for China
num_days_China = nrow(ourfinaldata %>% filter(Country_Region == "China",Confirmed >3))
China_dat = ourfinaldata %>% filter(Country_Region == "China",Confirmed >3) %>% mutate("Num_Days_Since_Start"=c(1:num_days_China)) %>% arrange(Date)
#Using Nonlinear Least Squares Logistic Growth Model
China_mod = nls(Confirmed ~ c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=65.29, b = -0.223, c = 83787), data = China_dat)
coef(summary(China_mod))
Estimate Std. Error t value Pr(>|t|)
a 55.3377334 4.854292e+00 11.39975 1.260588e-21
b -0.2104574 4.488438e-03 -46.88879 1.159049e-86
c 82924.2197982 1.685054e+02 492.11618 9.132441e-226
plot(China_dat$Num_Days_Since_Start,China_dat$Confirmed,xlab = "Number of Days Since Start",ylab = "Number of Confirmed Cases",main = "Logistic Regression for China")
curve(77131.26/(1+479.15*exp(x*-0.480)), col = "blue", add = TRUE)

Logistic Growth for Spain
num_days_Spain = nrow(ourfinaldata %>% filter(Country_Region == "Spain",Confirmed >3))
Spain_dat = ourfinaldata %>% filter(Country_Region == "Spain",Confirmed >3) %>% mutate("Num_Days_Since_Start"=c(1:num_days_Spain)) %>% arrange(Date)
#Using Nonlinear Least Squares Logistic Growth Model
Spain_mod = nls(Confirmed ~ c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=30.29, b = -0.20, c = 191726), data = Spain_dat)
coef(summary(Spain_mod))
Estimate Std. Error t value Pr(>|t|)
a 1.499439e+02 1.875001e+01 7.997004 1.869431e-12
b -1.249607e-01 3.193219e-03 -39.133135 4.826404e-64
c 2.320023e+05 1.127359e+03 205.792794 1.338163e-137
Developing Countries
List of developing countries in our dataset: Chad, Central African Republic, Bhutan, Afghanistan, Mali, Malawi, Eritrea, Haiti, Benin, Niger, Laos, Guinea, Cambodia, Mauritania, Liberia, Bangladesh, Georgia, Panama, Bulgaria
Logistic Growth for Developing Countries
Logistic Growth for Bulgaria
#Bulgaria
ourfinaldata %>% filter(Country_Region == "Bulgaria") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Bulgaria Cases and Deaths")

num_days_Bulgaria = nrow(ourfinaldata %>% filter(Country_Region == "Bulgaria",Confirmed >3))
Bulgaria_dat = ourfinaldata %>% filter(Country_Region == "Bulgaria",Confirmed >=1) %>% mutate("Num_Days_Since_Start"=c(1:num_days_Bulgaria))
Bulgaria_mod = nls(Confirmed ~ c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=16.7, b = -0.1489, c = 788.76), data = Bulgaria_dat)
coef(summary(Bulgaria_mod))
Estimate Std. Error t value Pr(>|t|)
a 36.93230558 2.011606308 18.35961 1.613310e-32
b -0.06734297 0.001361859 -49.44930 4.574813e-68
c 2898.14046824 29.930246579 96.82982 2.224697e-94
Cambodia Logistic Growth
ourfinaldata %>% filter(Country_Region == "Cambodia") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Cambodia Cases and Deaths")

num_days_Cambodia = nrow(ourfinaldata %>% filter(Country_Region == "Cambodia",Confirmed >=1))
Cambodia_dat = ourfinaldata %>% filter(Country_Region == "Cambodia",Confirmed >=1) %>% mutate("Num_Days_Since_Start"=c(1:num_days_Cambodia))
Cambodia_mod = nls(Confirmed ~ c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=16.7, b = -0.1, c = 140), data = Cambodia_dat)
coef(summary(Cambodia_mod))
Estimate Std. Error t value Pr(>|t|)
a 1.985966e+07 1.196141e+07 1.660311 9.920775e-02
b -3.041262e-01 1.091909e-02 -27.852713 2.354166e-57
c 1.217189e+02 4.299553e-01 283.096718 8.326562e-187
plot(Cambodia_dat$Num_Days_Since_Start,Cambodia_dat$Confirmed,xlab = "Number of Days Since Start",ylab = "Number of Confirmed Cases",main = "Logistic Regression for Cambodia")
curve(117.01/(1+1.656584e+08*exp(x*-0.345)), col = 4, add = TRUE)

#lines(Cambodia_dat$Num_Days_Since_Start, predict(Cambodia_mod), col = 2)
ourfinaldata %>% filter(Country_Region == "Panama" | Country_Region == "Norway"| Country_Region == "Georgia"|Country_Region == "Bangladesh"|Country_Region == "Guinea")%>% group_by(Date,Country_Region)%>% ggplot(mapping = aes(x = Date, y = Confirmed,color = Country_Region))+geom_line()+ggtitle("Confirmed Cases across the World")

Junk
ourfinaldata %>% filter(Country_Region == "Brazil") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Brazil Cases and Deaths")

---
title: "Coronovirus_FinalProject"
output: html_notebook
editor_options: 
  chunk_output_type: inline
---

Population
GDP
Avg Temperature

```{r}
library(tidyverse)
library(anchors)
library(moderndive)
```

#Code to Set Up our CSV file
The code in this chunk below sums up all the cases for countries that are broken into regions so we can have one value per row for those countries.  We had to do this for the US, China, and Australia.
```{r}
virus = read_csv("covid_19_clean_complete.csv")
virus <- replace.value(virus,"Country_Region",from = "US",to = "United States") #Replacing value of US with value United States

#US_virus is the data with the cruise ships filtered out and sums up all the cases in each county and combines them into a total number of cases grouped by date
#No_US_virus is the data without the US and is just for all the other countries
#country_virus is the final dataset grouped by country


#US_virus = virus %>% filter(Country_Region == "US") %>% filter(Province_State != "Grand Princess Cruise Ship",Province_State != "Omaha, NE (From Diamond Princess)",Province_State != "Travis, CA (From Diamond Princess)",Province_State != "Lackland, TX (From Diamond Princess)",Province_State != "Unassigned Location (From Diamond Princess)") %>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths), Recovered = sum(Recovered)) %>% mutate("Province_State" = "US", "Country_Region" = "US") #OLD DATSET CODE; new dataset broken into cities and countries

#Code for Second Version of Dataset until 3/25
#US_virus = virus %>% filter(Country_Region == "United States")%>% group_by(Date) %>% separate(col = Province_State, into = c("City_or_County","State"),sep = ",") %>% filter(is.na(State)== T)%>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths), Recovered = sum(Recovered)) %>% mutate("Province_State" = "United States", "Country_Region" = "United States")
US_virus = virus %>% filter(Country_Region == "United States")%>% group_by(Date) %>% separate(col = Province_State, into = c("City_or_County","State"),sep = ",") %>% filter(is.na(State)== T)%>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "United States", "Country_Region" = "United States")

No_US_virus = virus %>% filter(Country_Region != "United States",Country_Region != "China",Country_Region != "Australia",Country_Region != "Canada")

country_virus = full_join(No_US_virus, US_virus)
country_virus %>% filter(Country_Region == "United States")

#For China
China_virus = virus %>% filter(Country_Region == "China") %>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "China", "Country_Region" = "China")
No_China_virus = virus %>% filter(Country_Region != "China", Country_Region != "United States",Country_Region != "Australia",Country_Region != "Canada")
country_virus1 = full_join(No_China_virus, China_virus)

#For Canada
Canada_virus = virus %>% filter(Country_Region == "Canada") %>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "Canada", "Country_Region" = "Canada")
No_Canada_virus = virus %>% filter(Country_Region != "China", Country_Region != "United States",Country_Region != "Australia", Country_Region != "Canada")
country_virus11 = full_join(No_Canada_virus, Canada_virus)

#For Australia
Australia_virus = virus %>% filter(Country_Region == "Australia")%>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "Australia", "Country_Region" = "Australia")
No_Australia_virus = virus %>% filter(Country_Region != "Australia", Country_Region != "United States",Country_Region != "China", Country_Region != "Canada")
country_virus2 = full_join(No_Australia_virus, Australia_virus)

predata = full_join(country_virus,country_virus1)
mydata = full_join(predata,country_virus2)
mydata2 = full_join(mydata,country_virus11)
#mydata2 %>% filter(Country_Region == "Canada")
```

```{r}
mydata2
#GDPandWeather = GDPandWeather %>% rename("Country_Region"= Country)
GDPandWeather
partialdata = left_join(mydata2,GDPandWeather,by = "Country_Region")
country_population_2 = country_population_1 %>% rename("Country_Region"= Country)
ourfinaldata = left_join(partialdata,country_population_2, by = "Country_Region")

```

Filter by the previous day to make sure we have the most recent data.
```{r}
ourfinaldata %>% filter(Country_Region == "United States")

#ourfinaldata
#poster = ourfinaldata %>% dplyr::select(-Province_State,-Recovered)
```


```{r}
write_csv(ourfinaldata, "ourfinaldata.csv")
library(lubridate)
```

#Code to Analyze our CSV file

```{r}
#Formatting the variable date as mdy and ordering it
ourfinaldata$Date <-as.Date(ourfinaldata$Date, format = "%m/%d/%y")
#One Plot of World Deaths
ourfinaldata %>% group_by(Date) %>% summarize(Total = sum(Confirmed,na.rm = TRUE), Dead_count = sum(Deaths,na.rm = TRUE)) %>% ggplot() + geom_line(aes(x = Date, y = Total,group=1, size = Dead_count)) +  ggtitle("World Cases and Deaths Graph 1")

#Another Plot of World Deaths
ourfinaldata %>% group_by(Date) %>% summarize(Total = sum(Confirmed,na.rm = TRUE), Dead_count = sum(Deaths,na.rm = TRUE)) %>% ggplot() + geom_line(aes(x = Date, y = Total,group = 1), linetype = "dashed")  + geom_line(aes(x = Date, y = Dead_count, group = 1), color = "red")+ ggtitle("World Cases and Deaths Graph 2")

#Deaths in China
ourfinaldata %>% filter(Country_Region == "China") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("China Cases and Deaths")

#Deaths in United States
ourfinaldata %>% filter(Country_Region == "United States") %>% group_by(Date) %>% arrange(Date)%>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("United States Cases and Deaths")
USA_trends = ourfinaldata %>% filter(Country_Region == "United States") %>% group_by(Date) %>% arrange(Date)
write_csv(USA_trends, "USA_trends.csv") 

#Deaths in South Korea
ourfinaldata %>% filter(Country_Region == "South Korea") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("South Korea Cases and Deaths")

#Deaths in Italy
ourfinaldata %>% filter(Country_Region == "Italy") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Italy Cases and Deaths")

#Deaths in Spain
ourfinaldata %>% filter(Country_Region == "Spain") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Spain Cases and Deaths")

#Deaths in Iran
ourfinaldata %>% filter(Country_Region == "Iran") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Iran Cases and Deaths")

#Deaths in Sweeden
ourfinaldata %>% filter(Country_Region == "Sweden") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Sweden Cases and Deaths")
```

```{r}
#Deaths in Canada
ourfinaldata %>% filter(Country_Region == "Canada") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Canada Cases and Deaths")

#Deaths in Japan
ourfinaldata %>% filter(Country_Region == "Japan") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Japan Cases and Deaths")

#Deaths in Costa Rica
ourfinaldata %>% filter(Country_Region == "Costa Rica") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Costa Rica Cases and Deaths")

#Deaths in Germany
ourfinaldata %>% filter(Country_Region == "Germany") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Germany Cases and Deaths")

#Deaths in United Kingdom
ourfinaldata %>% filter(Country_Region == "United Kingdom"& is.na(Province_State)) %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("United Kingdom Cases and Deaths")

#Deaths in France
ourfinaldata %>% filter(Country_Region == "France" & is.na(Province_State)) %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("France Cases and Deaths")

#Russia
ourfinaldata %>% filter(Country_Region == "Russia") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Russia Cases and Deaths")

#Egypt
ourfinaldata %>% filter(Country_Region == "Egypt") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Egypt Cases and Deaths")

#South Africa
ourfinaldata %>% filter(Country_Region == "South Africa") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("South Africa Cases and Deaths")
```


```{r}
#Finlad
ourfinaldata %>% filter(Country_Region == "Finland") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Finland Cases and Deaths")

#Austria
ourfinaldata %>% filter(Country_Region == "Austria") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Austria Cases and Deaths")

#Portugal
ourfinaldata %>% filter(Country_Region == "Portugal") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Portugal Cases and Deaths")

#Indonesia
ourfinaldata %>% filter(Country_Region == "Indonesia") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Indonesia Cases and Deaths")

#Australia
ourfinaldata %>% filter(Country_Region == "Australia") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Australia Cases and Deaths")

#Mexico
ourfinaldata %>% filter(Country_Region == "Mexico") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Mexico Cases and Deaths")

```

```{r}
#Serbia
ourfinaldata %>% filter(Country_Region == "Serbia") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Serbia Cases and Deaths")

#Iceland
ourfinaldata %>% filter(Country_Region == "Iceland") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Iceland Cases and Deaths")

#Afghanistan
ourfinaldata %>% filter(Country_Region == "Afghanistan") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Afghanistan Cases and Deaths")

#Norway
ourfinaldata %>% filter(Country_Region == "Norway") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Norway Cases and Deaths")
```

```{r}
#Comparison of Total Cases in each country
ourfinaldata %>% filter(Country_Region == "United States"|Country_Region == "Italy"|Country_Region=="China"|Country_Region == "Spain"|Country_Region == "Iran"|Country_Region == "Australia"|Country_Region == "South Korea"|Country_Region == "Canada"|Country_Region == "Brazil" ) %>% ggplot(mapping = aes(x=Date,y = Confirmed, color = Country_Region))+geom_line() +ggtitle("Total Confirmed Cases Over Time")
ourfinaldata %>% filter(Country_Region == "United States"|Country_Region == "Italy"|Country_Region=="China"|Country_Region == "Spain"|Country_Region == "Iran"|Country_Region == "Australia"|Country_Region == "South Korea"|Country_Region == "Canada"|Country_Region == "Brazil") %>% ggplot(mapping = aes(x=Date,y = Deaths, color = Country_Region))+geom_line() +ggtitle("Total Confirmed Deaths Over Time")
```



Plots of the Percentage of Population Infected
```{r}

#Note the y-axis for these graphs should be 0-100 because calculated as a percent but this shows how few people have it at this point
#USA
ourfinaldata %>%filter(Country_Region == "United States") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in the USA")

#China
ourfinaldata %>%filter(Country_Region == "China") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in China")

#Italy
ourfinaldata %>%filter(Country_Region == "Italy") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in Italy")

#Spain
ourfinaldata %>%filter(Country_Region == "Spain") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in Spain")

#India
ourfinaldata %>%filter(Country_Region == "India") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in India")

#Iran
ourfinaldata %>%filter(Country_Region == "Iran") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in Iran")

#South Africa
ourfinaldata %>%filter(Country_Region == "South Africa") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in South Africa")
```


#Logistic Growth for Italy
```{r}
ourfinaldata %>% filter(Country_Region == "Italy") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Italy Cases and Deaths")

num_days_Italy = nrow(ourfinaldata %>% filter(Country_Region == "Italy",Confirmed >=1))
Italy_dat = ourfinaldata %>% filter(Country_Region == "Italy",Confirmed >=1) %>% mutate("Num_Days_Since_Start"=c(1:num_days_Italy))

Italy_mod = nls(Confirmed ~  c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=100, b = -0.13, c = max(Italy_dat$Confirmed)), data = Italy_dat)
coef(summary(Italy_mod))

plot(Italy_dat$Num_Days_Since_Start,Italy_dat$Confirmed,xlab = "Number of Days Since Start",ylab = "Number of Confirmed Cases",main = "Logistic Regression for Italy")
curve(2.145977e+05/(1+692.2*exp(x*-0.104)), col = "blue", add = TRUE)
#lines(Italy_dat$Num_Days_Since_Start, predict(Italy_mod), col = 2)


```

#Logistic Growth for United States
```{r}
num_days_USA = nrow(ourfinaldata %>% filter(Country_Region == "United States",Confirmed >1))
USA_dat = ourfinaldata %>% filter(Country_Region == "United States",Confirmed >1) %>% mutate("Num_Days_Since_Start"=c(1:num_days_USA)) %>% arrange(Date)

#Using Nonlinear Least Squares Logistic Growth Model
#USA_mod = nls(Confirmed ~  c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=30000.3, b = -0.10, c = max(USA_dat$Confirmed)), data = USA_dat)
#coef(summary(USA_mod))
```

#Logistic Growth for China
```{r}
num_days_China = nrow(ourfinaldata %>% filter(Country_Region == "China",Confirmed >3))
China_dat = ourfinaldata %>% filter(Country_Region == "China",Confirmed >3) %>% mutate("Num_Days_Since_Start"=c(1:num_days_China)) %>% arrange(Date)

#Using Nonlinear Least Squares Logistic Growth Model
China_mod = nls(Confirmed ~  c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=65.29, b = -0.223, c = 83787), data = China_dat)
coef(summary(China_mod))


plot(China_dat$Num_Days_Since_Start,China_dat$Confirmed,xlab = "Number of Days Since Start",ylab = "Number of Confirmed Cases",main = "Logistic Regression for China")
curve(77131.26/(1+479.15*exp(x*-0.480)), col = "blue", add = TRUE)
```

#Logistic Growth for Spain
```{r}
num_days_Spain = nrow(ourfinaldata %>% filter(Country_Region == "Spain",Confirmed >3))
Spain_dat = ourfinaldata %>% filter(Country_Region == "Spain",Confirmed >3) %>% mutate("Num_Days_Since_Start"=c(1:num_days_Spain)) %>% arrange(Date)

#Using Nonlinear Least Squares Logistic Growth Model
Spain_mod = nls(Confirmed ~  c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=30.29, b = -0.20, c = 191726), data = Spain_dat)
coef(summary(Spain_mod))
```

#County/State Data for US

```{r}
my_counties = read_csv("usa_county_wise.csv")
my_counties$Date <-as.Date(my_counties$Date, format = "%m/%d/%y")
my_counties %>% filter(Province_State == "Pennsylvania" & Date == "2020-06-10")

my_counties %>% filter(Province_State == "Pennsylvania"|Province_State == "New York"|Province_State == "New Jersey"|Province_State == "Delaware"|Province_State == "California"|Province_State == "Michigan"|Province_State == "Arizona") %>% group_by(Date,Province_State) %>% summarize(state_total = sum(Confirmed))%>% ggplot(mapping = aes(x = Date, y = state_total,color = Province_State))+geom_line()+ggtitle("Confirmed Cases across the States")


my_counties %>% filter(Date == "2020-06-10") %>% group_by(Province_State) %>% summarize(state_total = sum(Confirmed)) %>% top_n(state_total,n=20) %>% arrange(desc(state_total))
my_counties %>% filter(Date == "2020-06-10") %>% group_by(Province_State) %>% summarize(state_total = sum(Confirmed)) %>% top_n(state_total,n=-20) %>% arrange(desc(state_total))


my_counties %>% filter(Province_State == "Pennsylvania") %>%filter(Admin2 == "Lehigh"|Admin2  == "Northampton"|Admin2 == "Berks"|Admin2 == "Luzerne") %>% group_by(Admin2) %>% top_n(Confirmed,20)%>%ggplot(aes(x = Date,y = Confirmed,color = Admin2))+geom_line()

my_counties %>% filter(Province_State == "Pennsylvania"& Date == "2020-06-10") %>% arrange(desc(Confirmed)) %>% mutate(DeathRate = Deaths/Confirmed)
```

#Developing Countries
List of developing countries in our dataset:
Chad, Central African Republic, Bhutan, Afghanistan, Mali, Malawi, Eritrea, Haiti, Benin, Niger, Laos, Guinea, Cambodia, Mauritania, Liberia, Bangladesh, Georgia, Panama, Bulgaria

#Logistic Growth for Developing Countries
Logistic Growth for Bulgaria
```{r}
#Bulgaria
ourfinaldata %>% filter(Country_Region == "Bulgaria") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Bulgaria Cases and Deaths")

num_days_Bulgaria = nrow(ourfinaldata %>% filter(Country_Region == "Bulgaria",Confirmed >3))
Bulgaria_dat = ourfinaldata %>% filter(Country_Region == "Bulgaria",Confirmed >=1) %>% mutate("Num_Days_Since_Start"=c(1:num_days_Bulgaria))

Bulgaria_mod = nls(Confirmed ~  c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=16.7, b = -0.1489, c = 788.76), data = Bulgaria_dat)
coef(summary(Bulgaria_mod))

```

#Cambodia Logistic Growth
```{r}
ourfinaldata %>% filter(Country_Region == "Cambodia") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Cambodia Cases and Deaths")

num_days_Cambodia = nrow(ourfinaldata %>% filter(Country_Region == "Cambodia",Confirmed >=1))
Cambodia_dat = ourfinaldata %>% filter(Country_Region == "Cambodia",Confirmed >=1) %>% mutate("Num_Days_Since_Start"=c(1:num_days_Cambodia))

Cambodia_mod = nls(Confirmed ~  c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=16.7, b = -0.1, c = 140), data = Cambodia_dat)

coef(summary(Cambodia_mod))

plot(Cambodia_dat$Num_Days_Since_Start,Cambodia_dat$Confirmed,xlab = "Number of Days Since Start",ylab = "Number of Confirmed Cases",main = "Logistic Regression for Cambodia")
curve(117.01/(1+1.656584e+08*exp(x*-0.345)), col = 4, add = TRUE)
#lines(Cambodia_dat$Num_Days_Since_Start, predict(Cambodia_mod), col = 2)
```

```{r}
ourfinaldata %>% filter(Country_Region == "Panama" | Country_Region == "Norway"| Country_Region == "Georgia"|Country_Region == "Bangladesh"|Country_Region == "Guinea")%>% group_by(Date,Country_Region)%>% ggplot(mapping = aes(x = Date, y = Confirmed,color = Country_Region))+geom_line()+ggtitle("Confirmed Cases across the World")
```

#Junk
```{r}
ourfinaldata %>% filter(Country_Region == "Brazil") %>% group_by(Date) %>%  ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed")  + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Brazil Cases and Deaths")
```

